#!/usr/bin/env python
# coding: utf-8

# In[ ]:


import numpy as np
import math
import copy
import time
import cvxopt
import matplotlib.patches as patch
import matplotlib.pyplot as plt
#The class of Agent
class Agent(object):
    def __init__(self, state, actions, time_step,neighbors=None):
        # the location of agent
        self.state = np.array(state)
        # the trajectory record for agent
        self.traj = [np.array(state)]       
        # the time frequency for update e.g. 0.05s
        self.time_step = time_step          
    
        # Action Space 
        # the whole feasible actions for agent, i.e, [np.array([0,1]),np.array([0,2])]
        # np.array([0,1])indicates agent move toward direction np.array([0,1])
        self.actions = actions             
        # the number of all actions
        self.n_actions = len(self.actions)
        self.action_indices = np.array(range(self.n_actions))
        # the operated action
        self.next_action_index = 0
        # Neighbors {index_neighbor:weight_neighbor}
        self.neighbors = neighbors

    def motion_model(self, state, action):
        '''
        :param state: The current state at time t.
        :param action: The current action at time t.
        :return: The resulting state x_{t+1}
        '''
        return state+action*self.time_step
    
    def apply_next_action(self):
        """
        Applies the next action to modify the agent state.
        :param t: Time step.
        :return: None
        """
        self.state = self.motion_model(self.state, self.actions[self.next_action_index])

    def Initialize(self,N=16,Surrogate=True):
        """
        Intialize the action_prob_dist
        :param N: the number of agents
        :return: None
        """
        if Surrogate:
            self.action_prob_dist=(1/self.n_actions)*np.ones((N,self.n_actions))
            self.pre_prob_dist=(1/self.n_actions)*np.ones((N,self.n_actions))
        else:
            self.action_prob_dist=np.zeros((N,self.n_actions))
            self.pre_prob_dist=np.zeros((N,self.n_actions))
            
       
        
    def OSG_intialize(self,n_time_step=2000):
        self.J = math.ceil(math.log2(n_time_step)) # number of experts
        self.g = np.sqrt(math.log(self.J) / n_time_step) 
        self.beta = 1 / n_time_step
        self.gamma = [np.sqrt(math.log(self.n_actions * n_time_step) / 2 ** (j - 1)) for j in range(1, self.J + 1)]   # J x 1
        self.expert_weight =np.array([1.0 / self.J for i in range(self.J)])                                  # weights of experts        
        self.action_weight = np.array([[1.0 / self.n_actions for k in range(self.n_actions)] for i in range(self.J)]) # weights of actions for all experts        
        self.action_prob_dist = None
        self.loss =[0.0 for i in range(self.n_actions)]
  
    def get_losses0(self, agents_considered, targets):
        """
        Returns the losses of all possible actions based on the estimation result of just executed actions
        :return: The losses of all possible actions.
        """
        #agents_considered is the list of locations/states of agents considered
    
        losses = np.zeros(self.n_actions)
        # the loss f(A\cup\{a\})-f(A)
        if len(agents_considered) == 0:
            # f(empty set) = 0 before any agent's moving
            # f(a): distance with action a without any other agent's action
            obj_action = []
            for i in range(self.n_actions):
                action = self.actions[i] # self.action_indices is np.array()
                state = self.motion_model(self.state, action)
                # compute f(a)
                SUM=0 
                for tar in targets:
                    P=np.diag((tar.noise*tar.time_step)**2)
                    Information=2*np.diag(1/((tar.noise*tar.time_step)**2))
                    Matrix1=Information+(P+(state-tar.state).reshape(2,1).dot((state-tar.state).reshape(1,2)))/0.01
                    SUM+=(0.5*np.trace(P)-np.trace(np.linalg.inv(Matrix1)))
                obj_action.append(SUM)
            losses =np.array(obj_action)
        else:
            tar_matrix=[]
            for tar in targets:
                P=np.diag((tar.noise*tar.time_step)**2)
                Matrix1=np.zeros((2,2))
                for ag in agents_considered:
                    Matrix1+=(P+(ag-tar.state).reshape(2,1).dot((ag-tar.state).reshape(1,2)))/0.01
                Matrix1+=2*np.diag(1/((tar.noise*tar.time_step)**2))
                tar_matrix.append(Matrix1)
            length=len(targets)
            for i in range(self.n_actions):
                action = self.actions[i]
                state = self.motion_model(self.state, action)
                gap_obj = 0
                for j in range(length):
                    Matrix_new=tar_matrix[j]+(np.diag((targets[j].noise*targets[j].time_step)**2)+(state-targets[j].state).reshape(2,1).dot((state-targets[j].state).reshape(1,2)))/0.01
                    gap_obj+=np.trace(np.linalg.inv(tar_matrix[j]))-np.trace(np.linalg.inv(Matrix_new))
        

                losses[i]=gap_obj
        return losses
        
    
    
    def get_losses1(self, agents_considered,index,targets,agents):
        """
        Returns the losses of all possible actions based on the estimation result of just executed actions
        :return: The losses of all possible actions.
        """
        #agents_considered is the list of locations/states of agents considered
    
        losses = np.zeros(self.n_actions)
        # the loss f(A\cup\{a\})-f(A-\{a\})
        if len(agents_considered) == 0:
            # f(empty set) = 0 before any agent's moving
            # f(a): distance with action a without any other agent's action
            obj_action = []
            for i in range(self.n_actions):
                action = self.actions[i] # self.action_indices is np.array()
                state = self.motion_model(self.state, action)
                # compute f(a)
                SUM=0
                for tar in targets:
                    P=np.diag((tar.noise*tar.time_step)**2)
                    Information=2*np.diag(1/((tar.noise*tar.time_step)**2))
                    Matrix1=Information+(P+(state-tar.state).reshape(2,1).dot((state-tar.state).reshape(1,2)))/0.01
                    SUM+=(0.5*np.trace(P)-np.trace(np.linalg.inv(Matrix1)))
                obj_action.append(SUM)
            losses =np.array(obj_action)
        else:
            length=len(targets)
            Reduced_ag_state1=[self.motion_model(agents[k].state,ACTIONS[m]) for k,m in agents_considered if k!=index]
            if len(Reduced_ag_state1)>0:
                tar_matrix1=[]
                for tar in targets:
                    P=np.diag((tar.noise*tar.time_step)**2)
                    Matrix1=np.zeros((2,2))
                    for ag in Reduced_ag_state1:
                        Matrix1+=(P+(ag-tar.state).reshape(2,1).dot((ag-tar.state).reshape(1,2)))/0.01
                    tar_matrix1.append(Matrix1)
            else:
                tar_matrix1=[np.zeros((2,2)) for j in range(length)]
            for i in range(self.n_actions):
                Reduced_ag_state2=[self.motion_model(agents[k].state,ACTIONS[m]) for k,m in agents_considered if k==index and m!=i]
                if len(Reduced_ag_state2)>0:
                    tar_matrix2=[]
                    for tar in targets:
                        P=np.diag((tar.noise*tar.time_step)**2)
                        Matrix1=np.zeros((2,2))
                        for ag in Reduced_ag_state2:
                            Matrix1+=(P+(ag-tar.state).reshape(2,1).dot((ag-tar.state).reshape(1,2)))/0.01
                        tar_matrix2.append(Matrix1)
                else:
                    tar_matrix2=[np.zeros((2,2)) for j in range(length)]
                
                #f(A\cup\{a})
                action = self.actions[i]
                state = self.motion_model(self.state, action)
                # f(A_{i-1} U a)
                gap_obj=0 
                for j in range(length):
                    matrix_new1=2*np.diag(1/((targets[j].noise*targets[j].time_step)**2))+tar_matrix1[j]+tar_matrix2[j]
                    matrix_new2=matrix_new1+(np.diag((targets[j].noise*targets[j].time_step)**2)+(state-targets[j].state).reshape(2,1).dot((state-targets[j].state).reshape(1,2)))/0.01
                    gap_obj+=np.trace(np.linalg.inv(matrix_new1))-np.trace(np.linalg.inv(matrix_new2))
        
                losses[i] = gap_obj
        return losses

    def get_losses2(self, agents_considered,index,targets,agents):
        """
        Returns the losses of all possible actions based on the estimation result of just executed actions
        :return: The losses of all possible actions.
        """
        #agents_considered is the list of locations/states of agents considered
        
        losses = np.zeros(self.n_actions)
        # the loss f(A\cup\{a\})-f(A-\{a\})
        if len(agents_considered) == 0:
            # f(empty set) = 0 before any agent's moving
            # f(a): distance with action a without any other agent's action
            obj_action = []
            for i in range(self.n_actions):
                action = self.actions[i] # self.action_indices is np.array()
                state = self.motion_model(self.state, action)
                # compute f(a)
                SUM=0 
                for tar in targets:
                    P=np.diag((tar.noise*tar.time_step)**2)
                    Information=2*np.diag(1/((tar.noise*tar.time_step)**2))
                    Matrix1=Information+(P+(state-tar.state).reshape(2,1).dot((state-tar.state).reshape(1,2)))/0.01
                    SUM+=(0.5*np.trace(P)-np.trace(np.linalg.inv(Matrix1)))
                obj_action.append(SUM)
            losses =np.array(obj_action)
        else:
            length=len(targets)
            Reduced_ag_state1=[self.motion_model(agents[k].state,ACTIONS[m]) for k,m in agents_considered if k!=index]
            if len(Reduced_ag_state1)>0:
                tar_matrix1=[]
                for tar in targets:
                    P=np.diag((tar.noise*tar.time_step)**2)
                    Matrix1=np.zeros((2,2))
                    for ag in Reduced_ag_state1:
                        Matrix1+=(P+(ag-tar.state).reshape(2,1).dot((ag-tar.state).reshape(1,2)))/0.01
                    tar_matrix1.append(Matrix1)
            else:
                tar_matrix1=[np.zeros((2,2)) for j in range(length)]
            for i in range(self.n_actions):
                #f(A\cup\{a})
                action = self.actions[i]
                state = self.motion_model(self.state, action)
                # f(A_{i-1} U a)
                gap_obj=0 
                for j in range(length):
                    matrix_new1=2*np.diag(1/((targets[j].noise*targets[j].time_step)**2))+tar_matrix1[j]
                    matrix_new2=matrix_new1+(np.diag((targets[j].noise*targets[j].time_step)**2)+(state-targets[j].state).reshape(2,1).dot((state-targets[j].state).reshape(1,2)))/0.01
                    gap_obj+=np.trace(np.linalg.inv(matrix_new1))-np.trace(np.linalg.inv(matrix_new2))
        
                losses[i] = gap_obj
        return losses



    def update_experts(self):
        """
        Updates the parameters of experts after getting losses (from t to t + 1)
        :param t: The index of time step
        :return: None
        """
        for j in range(self.J):
            Index_Set=[self.gamma[j] * self.loss[i] for i in range(self.n_actions)]
            MAX=max(Index_Set)
            if MAX>700:
                Index_Set=[Index_Set[i]*700/MAX  for i in range(self.n_actions)]
            v = [self.action_weight[j][i] * np.exp(Index_Set[i]) for i in range(self.n_actions)]
            #################
            update1=np.dot(np.array(self.loss), np.array(self.action_weight[j]))
            self.action_weight[j]= [self.beta * np.sum(v) / self.n_actions + (1 - self.beta) * v[i] for i in range(self.n_actions)]
            self.expert_weight[j] = self.expert_weight[j] * np.exp(self.g *update1)
            

        self.action_weight= np.array([self.action_weight[j] / np.linalg.norm(self.action_weight[j], ord=1) for j in range(self.J)],dtype=np.float128)
        self.expert_weight= self.expert_weight/ np.linalg.norm(np.array(self.expert_weight), ord=1)
    
    def get_action_prob_dist(self):
        """
        Returns the output of FSF* (the predicted action probability distribution)
        :param t: The index of time step.
        :return: None.
        """
        q = np.array(self.expert_weight) # J x 1
        p = np.array(self.action_weight) # J x m
        self.action_prob_dist=np.dot(q, p).tolist() # m x 1, note that self.action_prob_dist is a list

